Small RNAseq: Differential Expression Analysis
Environment Setup
All programs needed for the analyses can be setup using conda-env.yml file.
salloc -N 1 --exclusive -p amd -t 8:00:00
conda env create -f conda-env.yml
conda activate smallrnaDownloading datasets
Raw data
Raw data was downloaded from the sequencing facility using the secure
link, with wget command. The downloaded files were checked
for md5sum and compared against list of files expected as per the input
samples provided. The files are now available on NCBI GEO database (GSE222855),
and can be downloaded using SRA-toolkit or
enaBrowserTools.
wget https://oc1.rnet.missouri.edu/xyxz # link masked
paste <(ls *_L001_R1_001.fastq.gz) <(ls *_L002_R1_001.fastq.gz) | \
sed 's/\t/ /g' |\
awk '{print "cat",$1,$2" > "$1}' |\
sed 's/_L001_R1_001.fastq.gz/.fq.gz/2' > concatenate.sh
chmod +x concatenate.sh
sh concatenate.shGenome/annotation
Additional files required for the analyses were downloaded from GenCode. The downloaded files are as follows:
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M30/GRCm39.primary_assembly.genome.fa.gz
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M30/gencode.vM30.annotation.gff3.gz
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M30/gencode.vM30.annotation.gtf.gz
gunzip GRCm39.primary_assembly.genome.fa.gz
gunzip gencode.vM30.annotation.gff3.gz
gunzip gencode.vM30.annotation.gtf.gzFastQC (before processing)
for fq in *.fq.gz; do
fastqc --threads $SLURM_JOB_CPUS_PER_NODE $fq;
done
mkdir -p fastqc_pre
mv *.zip *.html fastqc_pre/Mapping
To index the genome, following command was run (in an interactive session).
fastaGenome="GRCm39.genome.fa"
gtf="gencode.vM30.annotation.gtf"
STAR --runThreadN $SLURM_JOB_CPUS_PER_NODE \
--runMode genomeGenerate \
--genomeDir $(pwd) \
--genomeFastaFiles $fastaGenome \
--sjdbGTFfile $gtf \
--sjdbOverhang 1Each fastq file was mapped to the indexed genome as
using runSTAR_map.sh script shown below:
#!/bin/bash
read1=$1
out=$(basename ${read1%%.*})
STARgenomeDir=$(pwd)
# illumina adapter
adapterseq="AGATCGGAAGAGC"
STAR \
--genomeDir ${STARgenomeDir} \
--readFilesIn ${read1} \
--outSAMunmapped Within \
--readFilesCommand zcat \
--outSAMtype BAM SortedByCoordinate \
--quantMode GeneCounts \
--outFilterMultimapNmax 20 \
--clip3pAdapterSeq ${adapterseq} \
--clip3pAdapterMMp 0.1 \
--outFilterMismatchNoverLmax 0.03 \
--outFilterScoreMinOverLread 0 \
--outFilterMatchNminOverLread 0 \
--outFilterMatchNmin 16 \
--outFileNamePrefix ${out} \
--alignSJDBoverhangMin 1000 \
--alignIntronMax 1 \
--runThreadN ${SLURM_JOB_CPUS_PER_NODE} \
--genomeLoad LoadAndKeep \
--limitBAMsortRAM 30000000000 \
--outSAMheaderHD "@HD VN:1.4 SO:coordinate"Mapping was run with a simple loop:
for fq in *.fq.gz; do
runSTAR_map.sh $fq;
doneCounting Stats
Counts generated by STAR with option
--quantMode GeneCounts were parsed to generate summary
stats as well as to extract annotated small RNA feature counts.
mkdir -p counts_files
# copy counts for each sample
cp *ReadsPerGene.out.tab counts_files/
cd counts_files
# merge counts
join_files.sh *ReadsPerGene.out.tab |\
sed 's/ReadsPerGene.out.tab//g' |\
grep -v "^N_" > counts_star.tsv
# merge stats
join_files.sh *ReadsPerGene.out.tab |\
sed 's/ReadsPerGene.out.tab//g' |\
head -n 1 > summary_star.tsv
join_files.sh *ReadsPerGene.out.tab |\
sed 's/ReadsPerGene.out.tab//g' |\
grep "^N_" >> summary_star.tsv
# parse GTF to extact gene.id and its biotype:
gtf=gencode.vM30.annotation.gtf
awk 'BEGIN{OFS=FS="\t"} $3=="gene" {split($9,a,";"); print a[1],a[2]}' ${gtf} |\
awk '{print $4"\t"$2}' |\
sed 's/"//g' > GeneType_GeneID.tsv
cut -f 1 GeneType_GeneID.tsv | sort |uniq > features.txtThe information for biotype as provided by the gencodegenes
were used for categorizing biotype.
The smallRNA group consists of following
biotype:
miRNA
misc_RNA
scRNA
snRNA
snoRNA
sRNA
scaRNA
The full table is as follows:
library(knitr)
setwd("/work/LAS/geetu-lab/arnstrm/mouse.trophoblast.smallRNAseq")
file1="assets/GeneType_Group.tsv"
info <-
read.csv(
file1,
header = TRUE,
sep = "\t",
stringsAsFactors = TRUE
)
kable(info, caption = "Table 1: biotype and its groupings")| biotype | group |
|---|---|
| protein_coding | coding_genes |
| pseudogene | pseudogenes |
| TR_C_gene | Ig_genes |
| TR_D_gene | Ig_genes |
| TR_J_gene | Ig_genes |
| TR_V_gene | Ig_genes |
| IG_C_gene | Ig_genes |
| IG_D_gene | Ig_genes |
| IG_J_gene | Ig_genes |
| IG_LV_gene | Ig_genes |
| IG_V_gene | Ig_genes |
| TR_J_pseudogene | pseudogenes |
| TR_V_pseudogene | pseudogenes |
| IG_C_pseudogene | pseudogenes |
| IG_D_pseudogene | pseudogenes |
| IG_pseudogene | pseudogenes |
| IG_V_pseudogene | pseudogenes |
| lncRNA | long_non_conding_RNA |
| miRNA | non_conding_RNA |
| misc_RNA | non_conding_RNA |
| ribozyme | non_conding_RNA |
| rRNA | non_conding_RNA |
| scaRNA | non_conding_RNA |
| scRNA | non_conding_RNA |
| snoRNA | non_conding_RNA |
| snRNA | non_conding_RNA |
| sRNA | non_conding_RNA |
| Mt_rRNA | non_conding_RNA |
| Mt_tRNA | non_conding_RNA |
| processed_pseudogene | pseudogenes |
| unprocessed_pseudogene | pseudogenes |
| translated_unprocessed_pseudogene | pseudogenes |
| transcribed_processed_pseudogene | pseudogenes |
| transcribed_unitary_pseudogene | pseudogenes |
| transcribed_unprocessed_pseudogene | pseudogenes |
| unitary_pseudogene | pseudogenes |
| TEC | unconfirmed_genes |
A samples table (samples.tsv) categorizing samples to
its condition were also generated:
library(tidyverse)
file2="assets/samples.tsv"
samples <-
read.csv(
file2,
header = TRUE,
sep = "\t",
stringsAsFactors = TRUE
)
samples$newname <- "NA"
samples$rep <- gsub("_$", ")", gsub("^_", "(rep", str_extract(samples$Sample,"_[0-9]_")))
samples$newname[which(str_detect(samples$Sample, "Dif_D6"))] <- "pTGC"
samples$newname[which(str_detect(samples$Sample, "Undif_D2"))] <- "TSC"
samples$Names <- paste0(samples$newname, samples$rep, sep = " ")
samples <- samples %>% select(Sample, Group, Names)
kable(samples, caption = "Table 2: Samples in the study")| Sample | Group | Names |
|---|---|---|
| Dif_D6_1_S4 | Diff | pTGC(rep1) |
| Dif_D6_2_S3 | Diff | pTGC(rep2) |
| Dif_D6_3_S2 | Diff | pTGC(rep3) |
| Dif_D6_4_S1 | Diff | pTGC(rep4) |
| Undif_D2_1_S8 | Undf | TSC(rep1) |
| Undif_D2_2_S7 | Undf | TSC(rep2) |
| Undif_D2_3_S6 | Undf | TSC(rep3) |
| Undif_D2_4_S5 | Undf | TSC(rep4) |
This information was then merged withe counts table to generate QC plots:
awk 'BEGIN{OFS=FS="\t"}FNR==NR{a[$1]=$2;next}{ print $2,$1,a[$1]}' \
GeneType_Group.tsv GeneType_GeneID.tsv > GeneID_GeneType_Group.tsv
awk 'BEGIN{OFS=FS="\t"}FNR==NR{a[$1]=$2"\t"$3;next}{print $1,a[$1],$0}' \
GeneID_GeneType_Group.tsv counts_star.tsv |\
cut -f 1-3,5- > processed_counts_star.tsvPlotting the mapping summary and count statistics for various biotypes:
library(scales)
library(tidyverse)
library(plotly)setwd("/work/LAS/geetu-lab/arnstrm/mouse.trophoblast.smallRNAseq")
file1="assets/processed_counts_star.tsv"
file2="assets/summary_stats_star.tsv"
counts <-
read.csv(
file1,
sep = "\t",
stringsAsFactors = TRUE
)
subread <-
read.csv(
file2,
sep = "\t",
stringsAsFactors = TRUE
)
# convert long format
counts.long <- gather(counts, Sample, Count, Dif_D6_1_S4:Undif_D2_4_S5, factor_key=TRUE)
subread.long <- gather(subread, Sample, Count, Dif_D6_1_S4:Undif_D2_4_S5, factor_key=TRUE)
# organize
counts.long$Group <-
factor(
counts.long$Group,
levels = c(
"coding_genes",
"non_conding_RNA",
"long_non_conding_RNA",
"pseudogenes",
"unconfirmed_genes",
"Ig_genes"
)
)
counts.labeled <- merge(x = counts.long, y = samples, by = "Sample")
subread.long$Assignments <-
factor(
subread.long$Assignments,
levels = c(
"N_input",
"N_unmapped",
"N_multimapping",
"N_unique",
"N_ambiguous",
"N_noFeature"
)
)
subread.labeled <- merge(x = subread.long, y = samples, by = "Sample")ggplot(subread.labeled, aes(x = Assignments, y = Count, fill = Assignments)) +
geom_bar(stat = 'identity') +
labs(x = "Subread assingments", y = "reads") + theme_minimal() +
scale_y_continuous(labels = label_comma()) +
scale_fill_manual(
values = c(
"N_input" = "#577b92",
"N_unmapped" = "#021927",
"N_multimapping" = "#007caa",
"N_unique" = "#1a3c54",
"N_ambiguous" = "#3a5b63",
"N_noFeature" = "#33526c"
)
) +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
hjust = 1,
size = 12
),
strip.text = element_text(
face = "bold",
color = "gray35",
hjust = 0,
size = 10
),
strip.background = element_rect(fill = "white", linetype = "blank"),
legend.position = "none"
) +
facet_wrap("Names", scales = "free_y", ncol = 4)
Figure 1: STAR read mapping and feature assignment. Here,
N_input is total input reads, N_unmapped is
reads that were either too short to map after adapter removal or had
higher mismatch rate to place reliably on the genome,
N_multimapping is reads mapped to multiple loci,
N_unique is reads mapped to unique loci. A subset of
N_unique reads that were unable to clearly assign to a
feature or assign any feature at all are grouped as
N_ambigious or N_noFeature, respectively
g <- ggplot(counts.labeled, aes(x = Group.x, y = Count, fill = Group.x)) +
geom_bar(stat = 'sum') +
labs(x = "biotype", y = "read counts") + theme_minimal() +
scale_y_continuous(labels = label_comma()) +
scale_fill_manual(values = c(
"coding_genes" = "#92b5b7",
"non_conding_RNA" = "#be4f54",
"long_non_conding_RNA" = "#eca87a",
"pseudogenes" = "#784440",
"unconfirmed_genes" = "#eba3a4",
"Ig_genes" = "#8a3a1b"
)) +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
hjust = 1,
size = 12
),
strip.text = element_text(
face = "bold",
color = "gray35",
hjust = 0,
size = 10
),
strip.background = element_rect(fill = "white", linetype = "blank"),
legend.position = "none"
) +
facet_wrap("Names", scales = "free_y", ncol = 4)
gFigure 2: Features with read counts
counts.nc <- filter(counts.long, Group %in% "non_conding_RNA" )
counts.nc.labeled <- merge(x = counts.nc, y = samples, by = "Sample")
counts.nc.labeled$GeneType <-
factor(
counts.nc$GeneType,
levels = c(
"miRNA",
"misc_RNA",
"snoRNA",
"snRNA",
"sRNA",
"scRNA",
"scaRNA",
"Mt_tRNA",
"Mt_rRNA",
"rRNA",
"ribozyme"
)
)
g <-
ggplot(counts.nc.labeled, aes(x = GeneType, y = Count, fill = GeneType)) +
geom_bar(stat = 'sum') +
labs(x = "biotype", y = "read counts") + theme_minimal() +
scale_y_continuous(labels = label_comma()) +
scale_fill_manual(
values = c(
'miRNA' = '#54693e',
'misc_RNA' = '#9c47cb',
'snRNA' = '#94d14f',
'snoRNA' = '#5c4f9c',
'scaRNA' = '#cca758',
'sRNA' = '#c85a90',
'Mt_tRNA' = '#80d0a8',
'Mt_rRNA' = '#c4533b',
'rRNA' = '#9ea5c0',
'ribozyme' = '#51333c'
)
) +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
hjust = 1,
size = 12
),
strip.text = element_text(
face = "bold",
color = "gray35",
hjust = 0,
size = 10
),
strip.background = element_rect(fill = "white", linetype = "blank"),
legend.position = "none"
) +
facet_wrap("Names", scales = "free_y", ncol = 4)
#ggplotly(g)
gFigure 3: non-coding biotype read counts
subset the counts file to select only smallRNA genes
snrna <- c('miRNA',
'misc_RNA',
'scRNA',
'snRNA',
'snoRNA',
'sRNA',
'scaRNA')
cts <- dplyr::filter(counts, GeneType %in% snrna) %>%
dplyr::select(Geneid, Dif_D6_1_S4:Undif_D2_4_S5)
write_delim(cts, file = "assets/noncoding_counts_star.tsv", delim = "\t")This noncoding_counts_star.tsv and
samples.tsv file will be used for DESeq2
analyses.
DESeq2
For the next steps, we used DESeq2 for performing the DE
analyses. Results were visualized as volcano plots and tables were
exported to excel.
Load packages
setwd("/work/LAS/geetu-lab/arnstrm/mouse.trophoblast.smallRNAseq")
library(DESeq2)
library(RColorBrewer)
library(pheatmap)
library(genefilter)
library(ggrepel)
library(biomaRt)
library(reshape2)
library(PupillometryR)
library(ComplexUpset)
library(splitstackshape)Import counts and sample metadata
The counts data and its associated metadata
(coldata) are imported for analyses.
counts = 'assets/noncoding_counts_star.tsv'
groupFile = 'assets/samples.tsv'
coldata <-
read.csv(
groupFile,
row.names = 1,
sep = "\t",
stringsAsFactors = TRUE
)
cts <- as.matrix(read.csv(counts, sep = "\t", row.names = "Geneid"))
Reorder columns of cts according to coldata
rows. Check if samples in both files match.
colnames(cts)
#> [1] "Dif_D6_1_S4" "Dif_D6_2_S3" "Dif_D6_3_S2" "Dif_D6_4_S1"
#> [5] "Undif_D2_1_S8" "Undif_D2_2_S7" "Undif_D2_3_S6" "Undif_D2_4_S5"
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]Normalize
The batch corrected read counts are then used for running DESeq2 analyses
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ Group)
vsd <- vst(dds, blind = FALSE, nsub =500)
keep <- rowSums(counts(dds)) >= 5
dds <- dds[keep, ]
dds <- DESeq(dds)
dds
#> class: DESeqDataSet
#> dim: 1266 8
#> metadata(1): version
#> assays(4): counts mu H cooks
#> rownames(1266): ENSMUSG00000119106.1 ENSMUSG00000119589.1 ...
#> ENSMUSG00000065444.3 ENSMUSG00000077869.3
#> rowData names(22): baseMean baseVar ... deviance maxCooks
#> colnames(8): Dif_D6_1_S4 Dif_D6_2_S3 ... Undif_D2_3_S6 Undif_D2_4_S5
#> colData names(2): Group sizeFactorvst <- assay(vst(dds, blind = FALSE, nsub = 500))
vsd <- vst(dds, blind = FALSE, nsub = 500)
pcaData <-
plotPCA(vsd,
intgroup = "Group",
returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))PCA plot for QC
PCA plot for the dataset. We export the PCs and save them as a data frame to plot.
rv <- rowVars(assay(vsd))
select <-
order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(vsd)[select, ]))
percentVar <- pca$sdev ^ 2 / sum(pca$sdev ^ 2)
intgroup = "Group"
intgroup.df <- as.data.frame(colData(vsd)[, intgroup, drop = FALSE])
group <- if (length(intgroup) == 1) {
factor(apply(intgroup.df, 1, paste, collapse = " : "))
}
d <- data.frame(
PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
intgroup.df,
name = colnames(vsd)
)
# rename
d$sample <- "NA"
d$rep <- gsub("^_", "(rep", str_extract(d$name,"_[0-9]_"))
d$rep <- gsub("_$", ")", d$rep)
d$sample[which(str_detect(rownames(d), "Dif_D6"))] <- "pTGC"
d$sample[which(str_detect(rownames(d), "Undif_D2"))] <- "TSC"
d$name <- paste0(d$sample, d$rep, sep = " ")plot PCA for components 1 and 2
#nudge <- position_nudge(y = 0.5)
g <- ggplot(d, aes(PC1, PC2, color = sample)) +
scale_shape_manual(values = 1:8) +
scale_color_manual(values = c('pTGC' = '#c6007b',
'TSC' = '#a0b600')) +
theme_bw() +
theme(legend.title = element_blank()) +
geom_point(size = 2, stroke = 2) +
geom_text_repel(aes(label = name)) +
xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))
gFigure 4: PCA plot for the first 2 principal components
Sample distance for QC
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- samples$Names
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
col = colors)Figure 5: Euclidean distance between samples
Set contrasts and find DE genes
resultsNames(dds)
#> [1] "Intercept" "Group_Undf_vs_Diff"
res.UndfvsDiff <- results(dds, contrast = c("Group", "Undf", "Diff"))
table(res.UndfvsDiff$padj < 0.05)
#>
#> FALSE TRUE
#> 579 294
res.UndfvsDiff <- res.UndfvsDiff[order(res.UndfvsDiff$padj),]
res.UndfvsDiffdata <-
merge(
as.data.frame(res.UndfvsDiff),
as.data.frame(counts(dds, normalized = TRUE)),
by = "row.names",
sort = FALSE
)Creating gene lists
The gene lists have Ensembl gene-ID-version. We need
mirbase_id, gene_biotype and
external_gene_name attached to make the results
interpretable. So we will download metadata from ensembl using
biomaRt package.
ensembl = useMart("ENSEMBL_MART_ENSEMBL")
ensembl = useDataset("mmusculus_gene_ensembl", mart = ensembl)
filterType <- "ensembl_gene_id_version"
filterValues <- rownames(cts)
attributeNames <- c('ensembl_gene_id_version',
'gene_biotype',
'mirbase_id',
'external_gene_name')
annot <- getBM(
attributes = attributeNames,
filters = filterType,
values = filterValues,
mart = ensembl
)
isDup <- duplicated(annot$ensembl_gene_id)
dup <- annot$ensembl_gene_id[isDup]
annot <- annot[!annot$ensembl_gene_id %in% dup, ] #this object will be saved and used laterVolcano plots
volcanoPlots2 <-
function(res.se,
string,
first,
second,
color1,
color2,
color3,
ChartTitle) {
res.se <- res.se[order(res.se$padj), ]
res.se <-
rownames_to_column(as.data.frame(res.se[order(res.se$padj), ]))
names(res.se)[1] <- "Gene"
res.data <-
merge(res.se,
annot,
by.x = "Gene",
by.y = "ensembl_gene_id_version")
res.data <- res.data %>% mutate_all(na_if, "")
res.data <- res.data %>% mutate_all(na_if, " ")
res.data <-
res.data %>% mutate(gene_symbol = coalesce(external_gene_name, Gene))
fc=1.5
log2fc=log(fc, base = 2)
neg.log2fc = log2fc * -1
res.data$diffexpressed <- "other.genes"
res.data$diffexpressed[res.data$log2FoldChange >= log2fc &
res.data$padj <= 0.05] <- "Higher expression in TSC"
res.data$diffexpressed[res.data$log2FoldChange <= neg.log2fc &
res.data$padj <= 0.05] <- "Higher expression in pTGC"
res.data$delabel <- ""
res.data$delabel[res.data$log2FoldChange >= log2fc
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange >= log2fc
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
res.data$delabel[res.data$log2FoldChange <= neg.log2fc
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange <= neg.log2fc
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
ggplot(res.data,
aes(
x = log2FoldChange,
y = -log10(padj),
col = diffexpressed,
label = delabel
)) +
geom_point(alpha = 0.5) +
xlim(-20, 20) +
theme_classic() +
scale_color_manual(name = "Expression", values = c(color1, color2, color3)) +
# geom_text_repel(
# data = subset(res.data, padj <= 0.05),
# max.overlaps = 15,
# show.legend = F,
# min.segment.length = Inf,
# seed = 42,
# box.padding = 0.5
# ) +
ggtitle(ChartTitle) +
xlab(paste("log2 fold change")) +
ylab("-log10 pvalue (adjusted)") +
theme(legend.text.align = 0)
}g <- volcanoPlots2(
res.UndfvsDiff,
"UndfvsDiff",
"Undf",
"Diff",
"#c6007b",
"#a0b600",
"grey",
ChartTitle = "TSC vs. pTGC"
)
ggplotly(g)Figure 6: Volcano plot showing genes overexpressed in undifferentiated and differentiated states.
Heatmap
Heatmap for the top 30 variable genes:
topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 30)
mat <- assay(vsd)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
mat2 <- merge(mat,
annot,
by.x = 'row.names',
by.y = "ensembl_gene_id_version")
mat2$gene <- paste0(mat2$external_gene_name," (",mat2$gene_biotype,")")
rownames(mat2) <- mat2[,'gene']
mat2 <- mat2[2:9]
colnames(mat2) <- samples$Names
heat_colors <- brewer.pal(9, "YlOrRd")
g <- pheatmap(
mat2,
color = heat_colors,
main = "Top 30 variable small RNA genes",
cluster_rows = T,
cluster_cols = T,
show_rownames = T,
border_color = NA,
fontsize = 10,
scale = "row",
fontsize_row = 10
)
gFigure 7: Heat map for top 30 variable small RNA genes
Write result tables
Here, we will attach the mirbase_id,
gene_biotype and external_gene_name downloaded
in the previous section to the results table. We will also filter the
the table to write:
- DE list that have
padjless than or equal to0.05 - DE list that have
padjless than or equal to0.05, andlog2FoldChangegreater than or equal to fold change of1.5 - DE list that have
padjless than or equal to0.05, andlog2FoldChangeless than or equal to fold change of-1.5 - Full list of DE table without any filtering.
names(res.UndfvsDiffdata)[1] <- "ensembl_gene_id_version"
res.UndfvsDiffdata <-
merge(x = res.UndfvsDiffdata,
y = annot,
by = "ensembl_gene_id_version",
all.x = TRUE)
res.UndfvsDiffdata <-
res.UndfvsDiffdata[, c(1,
(ncol(res.UndfvsDiffdata) - 2):ncol(res.UndfvsDiffdata),
2:(ncol(res.UndfvsDiffdata) - 3))]
res.UndfvsDiffSig <- res.UndfvsDiffdata %>%
filter(padj <= 0.05)
fc = 1.5
log2fc = log(fc, base = 2)
neg.log2fc = log2fc * -1
res.UndfvsDiffSig.up <- res.UndfvsDiffdata %>%
filter(padj <= 0.05 & log2FoldChange >= log2fc)
res.UndfvsDiffSig.dw <- res.UndfvsDiffdata %>%
filter(padj <= 0.05 & log2FoldChange <= neg.log2fc)
write_delim(res.UndfvsDiffdata,
file = "results/DESeq2_results_UndfvsDiff_full-table.tsv",
delim = "\t")
write_delim(res.UndfvsDiffSig,
file = "results/DESeq2_results_UndfvsDiff_adj.p-le-0.05.tsv",
delim = "\t")
write_delim(res.UndfvsDiffSig.up,
file = "results/DESeq2_results_UndfvsDiff_adj.p-le-0.05_fc-ge-1.5.tsv",
delim = "\t")
write_delim(res.UndfvsDiffSig.dw,
file = "results/DESeq2_results_UndfvsDiff_adj.p-le-0.05_fc-le-neg1.5.tsv",
delim = "\t")Characterizing smallRNA expression
To check the expression of genes in each condition, we looked at the highly expressed genes and their composition. The analyses is shown below.
exp <- res.UndfvsDiffdata[, c(1:4, 11:ncol(res.UndfvsDiffdata))]
exp$external_gene_name <-
ifelse(exp$external_gene_name == "",
exp$ensembl_gene_id_version,
exp$external_gene_name)
exp$gene <- paste0(exp$external_gene_name, "(", exp$gene_biotype, ")")
renamed.exp <- exp[, c(13, 1:12)]
renamed.exp.long <-
melt(
renamed.exp,
id.vars = c(
'gene',
'ensembl_gene_id_version',
'gene_biotype',
'mirbase_id',
'external_gene_name'
)
)
colnames(renamed.exp.long) <-
c('gene',
'ensembl_gene_id_version',
'gene_biotype',
'mirbase_id',
'external_gene_name',
'replicates',
'norm.expression'
)
renamed.exp.long$condition <- "NA"
renamed.exp.long$condition[which(str_detect(renamed.exp.long$replicates, "Dif_D6"))] <-
"pTGC"
renamed.exp.long$condition[which(str_detect(renamed.exp.long$replicates, "Undif_D2"))] <-
"TSC"
renamed.exp.long <-
renamed.exp.long %>% filter(norm.expression > 0)
renamed.exp.long <- na.omit(renamed.exp.long)
temp <-
merge(x = renamed.exp.long,
y = samples,
by.x = "replicates",
by.y = "Sample")
temp$replicates <- temp$Names
renamed.exp.long <- temp %>%
dplyr::select(replicates:condition)
# SOURCE: https://ourcodingclub.github.io/tutorials/dataviz-beautification/
theme_niwot <- function() {
theme_bw() +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 12),
axis.line.x = element_line(color = "black"),
axis.line.y = element_line(color = "black"),
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = unit(c(1, 1, 1, 1), units = , "cm"),
plot.title = element_text(
size = 12,
vjust = 1,
hjust = 0
),
legend.text = element_text(size = 12),
legend.title = element_blank(),
legend.position = c(0.95, 0.15),
legend.key = element_blank(),
legend.background = element_rect(
color = "black",
fill = "transparent",
size = 2,
linetype = "blank"
)
)
}Normalized expression plots
vlnPlot <-
function(dataIn = renamed.exp.long,
xcol = "replicates",
fillcol = "condition",
expre = "norm.expression") {
p <- ggplot(data = dataIn,
aes_string(x = xcol, y = expre, fill = fillcol)) +
geom_flat_violin(position = position_nudge(x = 0.2, y = 0),
alpha = 0.6,
trim = FALSE) +
geom_point(
aes_string(y = expre, color = fillcol),
position = position_jitter(width = 0.15),
size = 1,
alpha = 0.5
) +
geom_boxplot(width = 0.2,
outlier.shape = NA,
alpha = 0.8) + stat_summary(
fun = mean,
geom = "point",
shape = 23,
size = 2
) +
labs(y = "Normalized Expression", x = NULL) +
guides(fill = "none", color = "none") +
scale_y_log10(label = comma) +
scale_fill_manual(values = c('pTGC' = '#c6007b',
'TSC' = '#a0b600')) +
scale_color_manual(values = c('pTGC' = '#c6007b',
'TSC' = '#a0b600')) +
theme_niwot() + theme(axis.text.x = element_text(
angle = 45,
vjust = 1,
hjust = 1
))
p
}Expression plots
Replicates
vlnPlot(xcol="replicates")Figure 8A: Normalized expression of genes in undifferentiated and differentiated samples (replicate)
Conditions
vlnPlot(xcol="condition")Figure 8B: Normalized expression of genes in undifferentiated and differentiated samples (condition)
Biotypes
vlnPlot(xcol="gene_biotype")Figure 8C: Normalized expression of genes in undifferentiated and differentiated samples (split based on gene biotype)
Highly expressed small RNAs
df.diff <- renamed.exp %>%
rowwise() %>%
mutate(Diff = mean(c(
Dif_D6_1_S4, Dif_D6_2_S3, Dif_D6_3_S2, Dif_D6_4_S1
))) %>%
dplyr::select(gene:external_gene_name, Diff) %>%
dplyr::filter(Diff > 0) %>%
ungroup() %>%
mutate(quart = ntile(Diff, 4)) %>%
mutate(decile = ntile(Diff, 10))
df.undiff <- renamed.exp %>%
rowwise() %>%
mutate(Undiff = mean(c(
Undif_D2_1_S8, Undif_D2_2_S7, Undif_D2_3_S6, Undif_D2_4_S5
))) %>%
dplyr::select(gene:external_gene_name, Undiff) %>%
dplyr::filter(Undiff > 0) %>%
ungroup() %>%
mutate(quart = ntile(Undiff, 4)) %>%
mutate(decile = ntile(Undiff, 10))
filterCuts <- function(dataIn = df.undiff,
cutOff = 4,
type = decile) {
type <- enquo(type)
dataIn %>%
dplyr::filter(!!type == cutOff) %>%
dplyr::select(ensembl_gene_id_version)
}
undiff.75pc <- filterCuts(df.undiff, 4, type = quart)
diff.75pc <- filterCuts(df.diff, 4, type = quart)
undiff.90pc <- filterCuts(df.undiff, 10, type = decile)
diff.90pc <- filterCuts(df.diff, 10, type = decile)
dataSlice <- function(dataA = diff.75pc, dataB = undiff.75pc) {
partA <- dataA %>% column_to_rownames(var = "ensembl_gene_id_version")
partA$colA = 1
partB <-
dataB %>% column_to_rownames(var = "ensembl_gene_id_version")
partB$colB = 1
partAB <- merge(partA,
partB,
by = 0,
all.x = TRUE,
all.y = TRUE)
partAB[is.na(partAB)] <- 0
partAB <-
merge(
x = partAB,
y = annot,
by.x = "Row.names",
by.y = "ensembl_gene_id_version",
all.x = TRUE
)
partAB <- partAB %>% dplyr::select(Row.names:gene_biotype)
partAB <- partAB %>% column_to_rownames(var = "Row.names")
return(partAB)
}
quartile.data <- dataSlice(diff.75pc, undiff.75pc)
colnames(quartile.data) <-
c("pTGC.UpperQuartile", "TSC.UpperQuartile", "gene_biotype")
plotUpSet <- function(fulltable = quartile.data) {
inter <- colnames(fulltable)[1:2]
p <- upset(
fulltable,
inter,
annotations = list(
'smallRNA type' = (
ggplot(mapping = aes(fill = gene_biotype)) +
geom_bar(stat = 'count', position = 'fill') +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(
values = c(
'miRNA' = '#54693e',
'misc_RNA' = '#9c47cb',
'snRNA' = '#94d14f',
'snoRNA' = '#5c4f9c',
'scaRNA' = '#cca758',
'sRNA' = '#c85a90'
)
)
+ ylab('smallRNA proportion')
)
),
queries = list(
upset_query(
intersect = inter,
color = '#C19B5D',
fill = '#C19B5D',
only_components = c('intersections_matrix', 'Intersection size')
),
upset_query(
intersect = inter[1],
color = '#c6007b',
fill = '#c6007b',
only_components = c('intersections_matrix', 'Intersection size')
),
upset_query(
intersect = inter[2],
color = '#a0b600',
fill = '#a0b600',
only_components = c('intersections_matrix', 'Intersection size')
)
),
width_ratio = 0.4,
set_sizes = (
upset_set_size(geom = geom_bar(
aes(fill = gene_biotype, x = group),
width = 0.8
),
position = 'right') + theme(legend.position = "none") +
scale_fill_manual(
values = c(
'miRNA' = '#54693e',
'misc_RNA' = '#9c47cb',
'snRNA' = '#94d14f',
'snoRNA' = '#5c4f9c',
'scaRNA' = '#cca758',
'sRNA' = '#c85a90'
)
)
),
guides = 'over'
)
p
}Intersection plots
Upper Quartile
plotUpSet(quartile.data)Figure 9A: Gene expression greater than 75th percentile (intersection)
Save intersection results
filterUpsetRes <-
function(datatable, title = "title") {
data.annot <-
merge(datatable[1:2],
annot,
by.x = 0,
by.y = "ensembl_gene_id_version",
all.x = TRUE)
mirna.annot <- data.annot %>%
dplyr::filter(.[[2]] == 1 & .[[3]] == 1) %>%
dplyr::filter(gene_biotype == "miRNA")
snrna.annot <- data.annot %>%
dplyr::filter(.[[2]] == 1 & .[[3]] == 1)
write_delim(
mirna.annot,
file = paste0("results/intersection_miRNA_", title , ".tsv"),
delim = "\t"
)
write_delim(
snrna.annot,
file = paste0("results/intersection_smallRNA_", title , ".tsv"),
delim = "\t"
)
}
filterUpsetRes(quartile.data, title = "quartile")Target genes for miRNAs
Target genes for miRNAs was done manually using the miRSystem
server. The target genes and pathway genes were saved as
csv files. The intersection of the target genes between the
diff and undiff samples were plotted.
library(ggvenn)
setwd("/work/LAS/geetu-lab/arnstrm/mouse.trophoblast.smallRNAseq/target_genes")
files <-
list.files(path = '.', pattern = "^DE_up_(diff|undiff)_targetGenes.csv$")
lst <-
setNames(lapply(files, read.csv),
tools::file_path_sans_ext(basename(files)))
x <- lapply(lst, '[[', 1)
names(x) <- c("upregulated in pTGC", "upregulated in TSC")
ggvenn(
x,
fill_color = c("#c6007b", "#a0b600"),
stroke_size = 0.3,
set_name_size = 4
)Figure 10: Predicted mRNA targets of miRNAs expressed in pTGC/TSC samples (intersection)
To save the file:
cd target_genes
comm \
<(cut -f 1 -d "," DE_up-undiff_targetGenes.csv |\
sed 's/"//g' |\
grep -wv "TARGET_GENE" |\
sort)\
<(cut -f 1 -d "," DE_up-diff_targetGenes.csv |\
sed 's/"//g' |\
grep -wv "TARGET_GENE" |\
sort)
> comm_file.tsv
cut -f 1 comm_file.tsv |\
sort |\
uniq |\
awk 'NF==1' > DE_up_undiff_only_targetGenes.csv
cut -f 2 comm_file.tsv |\
sort |\
uniq |\
awk 'NF==1' > DE_up_diff_only_targetGenes.csv
cut -f 3 comm_file.tsv |\
sort |\
uniq |\
awk 'NF==1' > DE_up_undiff_and_diff_targetGenes.csvEnrichment analyses of target genes
setwd("/work/LAS/geetu-lab/arnstrm/mouse.trophoblast.smallRNAseq/target_genes")
source( "../assets/theme_clean.R")
library(TissueEnrich)
files <-
list.files(path = '.', pattern = ".*\\_targetGenes.csv$")
lst <-
setNames(lapply(files, read.csv),
tools::file_path_sans_ext(basename(files)))
gene.lsts <- lapply(lst, '[[', 1)
plotTE <- function(inputGenes = gene.list,
myColor = "color") {
gs <-
GeneSet(geneIds = inputGenes,
organism = "Mus Musculus",
geneIdType = SymbolIdentifier())
output <- teEnrichment(inputGenes = gs, rnaSeqDataset = 3)
en.output <-
setNames(data.frame(assay(output[[1]]),
row.names = rowData(output[[1]])[, 1]),
colData(output[[1]])[, 1])
en.output$Tissue <- rownames(en.output)
logp <- -log10(0.05)
en.output <-
mutate(en.output,
significance = ifelse(Log10PValue > logp,
"colored", "nocolor"))
en.output$Sig <- "NA"
ggplot(en.output, aes(reorder(Tissue, Log10PValue),
Log10PValue,
fill = significance)) +
geom_bar(stat = 'identity') +
theme_clean() + ylab("- log10 adj. p-value") + xlab("") +
scale_fill_manual(values = c("colored" = myColor, "nocolor" = "gray")) +
scale_y_continuous(expand = expansion(mult = c(0, .1)),
breaks = scales::pretty_breaks()) +
coord_flip()
}
plotTE.heatmap <-
function(inputGenes = gene.list,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
) {
gs <-
GeneSet(geneIds = inputGenes,
organism = "Mus Musculus",
geneIdType = SymbolIdentifier())
output <- teEnrichment(inputGenes = gs, rnaSeqDataset = 3)
en.output <-
setNames(data.frame(assay(output[[1]]),
row.names = rowData(output[[1]])[, 1]),
colData(output[[1]])[, 1])
en.output$Tissue <- rownames(en.output)
seExp <- output[[2]][[inputTissue]]
exp <-
setNames(data.frame(assay(seExp), row.names = rowData(seExp)[, 1]),
colData(seExp)[, 1])
exp <- head(exp[order(exp$`E14.5-Brain`, decreasing = TRUE), ], 25)
g <- pheatmap(
exp,
color = heat_colors,
cluster_rows = F,
cluster_cols = T,
show_rownames = GeneNames,
border_color = NA,
fontsize = 10,
scale = "row",
fontsize_row = 8
)
g
}
plotTE.heatmap.full <-
function(inputGenes = gene.list,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
) {
gs <-
GeneSet(geneIds = inputGenes,
organism = "Mus Musculus",
geneIdType = SymbolIdentifier())
output <- teEnrichment(inputGenes = gs, rnaSeqDataset = 3)
en.output <-
setNames(data.frame(assay(output[[1]]),
row.names = rowData(output[[1]])[, 1]),
colData(output[[1]])[, 1])
en.output$Tissue <- rownames(en.output)
seExp <- output[[2]][[inputTissue]]
exp <-
setNames(data.frame(assay(seExp), row.names = rowData(seExp)[, 1]),
colData(seExp)[, 1])
g <- pheatmap(
exp,
color = heat_colors,
cluster_rows = F,
cluster_cols = T,
show_rownames = GeneNames,
border_color = NA,
fontsize = 10,
scale = "row",
fontsize_row = 8
)
g
}
heatmap.genelst <-
function(inputGenes = gene.list,
inputTissue = "E14.5-Brain",
myCut = 4,
tableTitle = "A simple table") {
gs <-
GeneSet(geneIds = inputGenes,
organism = "Mus Musculus",
geneIdType = SymbolIdentifier())
output <- teEnrichment(inputGenes = gs, rnaSeqDataset = 3)
en.output <-
setNames(data.frame(assay(output[[1]]),
row.names = rowData(output[[1]])[, 1]),
colData(output[[1]])[, 1])
en.output$Tissue <- rownames(en.output)
seExp <- output[[2]][[inputTissue]]
exp <-
setNames(data.frame(assay(seExp), row.names = rowData(seExp)[, 1]),
colData(seExp)[, 1])
gl <-
dplyr::select(exp, `E14.5-Brain`) %>%
mutate(cutN = ntile(`E14.5-Brain`, myCut)) %>%
filter(cutN == myCut) %>%
rownames
gl
}Enrichment plots (TissueEnrich)
pTGC
plotTE(gene.lsts$DE_up_diff_targetGenes, myColor = "#c6007b")Figure 11A: Target genes of upregulated miRNAs in pTGC, enrichment tested against Mouse ENCODE data
TSC
plotTE(gene.lsts$DE_up_undiff_targetGenes, myColor = "#a0b600")Figure 11B: Target genes of upregulated miRNAs in TSC, enrichment tested against Mouse ENCODE data
pTGC & TSC
plotTE(gene.lsts$DE_up_undiff_and_diff_targetGenes, myColor = "#C19B5D")Figure 11C: Overlapping target genes of miRNAs upregulated in both pTGC and TSC, enrichment tested against Mouse ENCODE data
pTGC ONLY
plotTE(gene.lsts$DE_up_diff_only_targetGenes, myColor = "#c6007b")Figure 11D: Target genes of miRNAs found only in upregulated miRNAs of pTGC, enrichment tested against Mouse ENCODE data
TSC ONLY
plotTE(gene.lsts$DE_up_undiff_only_targetGenes, myColor = "#a0b600")Figure 11E: Target genes of miRNAs found only in upregulated miRNAs of TSC, Mouse ENCODE data
pTGC & TSC quartile
plotTE(gene.lsts$Top_Quartile_Exp_targetGenes, myColor = "#f53d00")Figure 11F: Target genes of upper quartile miRNAs expressed in pTGC and TSC, enrichment tested against Mouse ENCODE data
Enrichment Heatmaps for E14.5-Brain (top 25, with gene names)
pTGC
plotTE.heatmap(gene.lsts$DE_up_diff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE)Figure 12A: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test performed with target genes (top 25 shown) of upregulated miRNAs in pTGC
TSC
plotTE.heatmap(gene.lsts$DE_up_undiff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE)Figure 12B: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test performed with target genes (top 25 shown) of upregulated miRNAs in TSC
pTGC & TSC
plotTE.heatmap(
gene.lsts$DE_up_undiff_and_diff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE
)Figure 12C: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the overlapping target genes (top 25 shown) of miRNAs upregulated in both pTGC and TSC
pTGC ONLY
plotTE.heatmap(
gene.lsts$DE_up_diff_only_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE
)Figure 12D: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes (top 25 shown) of miRNAs found only in pTGC
TSC ONLY
plotTE.heatmap(
gene.lsts$DE_up_undiff_only_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE
)Figure 12E: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes (top 25 shown) of miRNAs found only in TSC
pTGC & TSC quartile
plotTE.heatmap(
gene.lsts$Top_Quartile_Exp_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = TRUE
)Figure 12F: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes (top 25 shown) of upper quartile miRNAs expressed in pTGC and TSC
Enrichment Heatmaps for E14.5-Brain (all, without gene names)
pTGC
plotTE.heatmap.full(gene.lsts$DE_up_diff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE)Figure 13A: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test performed with target genes of upregulated miRNAs in pTGC
TSC
plotTE.heatmap.full(gene.lsts$DE_up_undiff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE)Figure 13B: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test performed with target genes of upregulated miRNAs in TSC
pTGC & TSC
plotTE.heatmap.full(
gene.lsts$DE_up_undiff_and_diff_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
)Figure 13C: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the overlapping target genes of miRNAs upregulated in both pTGC and TSC
pTGC ONLY
plotTE.heatmap.full(
gene.lsts$DE_up_diff_only_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
)Figure 13D: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes of miRNAs found only in pTGC
TSC ONLY
plotTE.heatmap.full(
gene.lsts$DE_up_undiff_only_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
)Figure 13E: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes of miRNAs found only in TSC
pTGC & TSC quartile
plotTE.heatmap.full(
gene.lsts$Top_Quartile_Exp_targetGenes,
inputTissue = "E14.5-Brain",
GeneNames = FALSE
)Figure 13F: Enrichment Heatmaps for E14.5-Brain tissue from the enrichment test using the target genes of upper quartile miRNAs expressed in pTGC and TSC
MultiQC report:
MultiQC report is available at this link
Session Information
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] grid stats4 stats graphics grDevices utils datasets
#> [8] methods base
#>
#> other attached packages:
#> [1] TissueEnrich_1.16.0 GSEABase_1.58.0
#> [3] graph_1.74.0 annotate_1.74.0
#> [5] XML_3.99-0.10 AnnotationDbi_1.58.0
#> [7] ensurer_1.1 ggvenn_0.1.9
#> [9] splitstackshape_1.4.8 ComplexUpset_1.3.3
#> [11] PupillometryR_0.0.4 rlang_1.0.4
#> [13] reshape2_1.4.4 biomaRt_2.52.0
#> [15] ggrepel_0.9.1 genefilter_1.78.0
#> [17] pheatmap_1.0.12 RColorBrewer_1.1-3
#> [19] DESeq2_1.36.0 SummarizedExperiment_1.26.1
#> [21] Biobase_2.56.0 MatrixGenerics_1.8.1
#> [23] matrixStats_0.62.0 GenomicRanges_1.48.0
#> [25] GenomeInfoDb_1.32.2 IRanges_2.30.1
#> [27] S4Vectors_0.34.0 BiocGenerics_0.42.0
#> [29] plotly_4.10.0 scales_1.2.0
#> [31] forcats_0.5.1 stringr_1.4.0
#> [33] dplyr_1.0.9 purrr_0.3.4
#> [35] readr_2.1.2 tidyr_1.2.0
#> [37] tibble_3.1.8 ggplot2_3.3.6
#> [39] tidyverse_1.3.2 knitr_1.39
#>
#> loaded via a namespace (and not attached):
#> [1] googledrive_2.0.0 colorspace_2.0-3 ellipsis_0.3.2
#> [4] XVector_0.36.0 fs_1.5.2 rstudioapi_0.13
#> [7] farver_2.1.1 bit64_4.0.5 fansi_1.0.3
#> [10] lubridate_1.8.0 xml2_1.3.3 codetools_0.2-18
#> [13] splines_4.2.1 cachem_1.0.6 geneplotter_1.74.0
#> [16] jsonlite_1.8.0 broom_1.0.0 dbplyr_2.2.1
#> [19] png_0.1-7 compiler_4.2.1 httr_1.4.3
#> [22] backports_1.4.1 assertthat_0.2.1 Matrix_1.4-1
#> [25] fastmap_1.1.0 lazyeval_0.2.2 gargle_1.2.0
#> [28] cli_3.3.0 prettyunits_1.1.1 htmltools_0.5.3
#> [31] tools_4.2.1 gtable_0.3.0 glue_1.6.2
#> [34] GenomeInfoDbData_1.2.8 rappdirs_0.3.3 Rcpp_1.0.9
#> [37] cellranger_1.1.0 jquerylib_0.1.4 vctrs_0.4.1
#> [40] Biostrings_2.64.1 crosstalk_1.2.0 xfun_0.31
#> [43] rvest_1.0.2 lifecycle_1.0.1 googlesheets4_1.0.0
#> [46] zlibbioc_1.42.0 vroom_1.5.7 hms_1.1.1
#> [49] parallel_4.2.1 curl_4.3.2 yaml_2.3.5
#> [52] memoise_2.0.1 sass_0.4.2 stringi_1.7.8
#> [55] RSQLite_2.2.15 highr_0.9 filelock_1.0.2
#> [58] BiocParallel_1.30.3 pkgconfig_2.0.3 bitops_1.0-7
#> [61] evaluate_0.15 lattice_0.20-45 patchwork_1.1.1
#> [64] htmlwidgets_1.5.4 labeling_0.4.2 bit_4.0.4
#> [67] tidyselect_1.1.2 plyr_1.8.7 magrittr_2.0.3
#> [70] bookdown_0.27 R6_2.5.1 generics_0.1.3
#> [73] DelayedArray_0.22.0 DBI_1.1.3 pillar_1.8.0
#> [76] haven_2.5.0 withr_2.5.0 survival_3.3-1
#> [79] KEGGREST_1.36.3 RCurl_1.98-1.8 modelr_0.1.8
#> [82] crayon_1.5.1 utf8_1.2.2 BiocFileCache_2.4.0
#> [85] tzdb_0.3.0 rmarkdown_2.14 progress_1.2.2
#> [88] locfit_1.5-9.6 readxl_1.4.0 data.table_1.14.2
#> [91] blob_1.2.3 rmdformats_1.0.4 reprex_2.0.1
#> [94] digest_0.6.29 xtable_1.8-4 munsell_0.5.0
#> [97] viridisLite_0.4.0 bslib_0.4.0